#!/bin/bash
TRAIN_FILE=/data/question_generation/duorc/train.json
# folder used to save fine-tuned checkpoints
OUTPUT_DIR=/data/question_generation/models/newsqa_att_lm_duorc_ft_deeply
# folder used to cache package dependencies
CACHE_DIR=/data/question_generation/cache_s2s_duorc_ft
MODEL_PATH=/data/question_generation/models/newsqa_att_lm_duorc_ft/ckpt-50000/pytorch_model.bin

export CUDA_VISIBLE_DEVICES=0,1,3
python -m torch.distributed.launch --nproc_per_node=3 run_seq2seq.py \
  --train_file ${TRAIN_FILE} --output_dir ${OUTPUT_DIR}  \
  --model_type bert --model_name_or_path $MODEL_PATH --config_name bert-base-uncased --tokenizer_name bert-base-uncased \
  --do_lower_case  --fp16 --fp16_opt_level O2 --max_source_seq_length 404 --max_target_seq_length 28 \
  --per_gpu_train_batch_size 16  --gradient_accumulation_steps 1 --save_steps 10000 \
  --learning_rate 7e-5 --num_warmup_steps 500 --num_training_steps 50000 --cache_dir ${CACHE_DIR}
